#ifndef PREPROCESS_H
#define PREPROCESS_H
#include <vector>
#include <string>
#include <map>
#include <algorithm>
#include <iostream>
#include <fstream>
#include "tokenizer.h"
#include "object.h"
using namespace std;

class Preprocess
{
public:
    Preprocess(string set1_name, string set2_name);

    vector<Object> set1_dest;   //resulting objects
    vector<Object> set2_dest;

    void readData(string set1_name, string set2_name);        //read from a file into set1_source, set2_source
    void transformObjects();         //transform source strings into resulting objects

private:
    vector<string> set1_source;     //1 string = 1 object, last token - score
    vector<string> set2_source;

    map<pair<string,int>, int> word_token_map;  //map word to corresponding token
    map<pair<string,int>, int> word_freq_map;   //map word to its document frequency

    void addObjectTokens(string str_object, int set_number);    //add tokens of 1 object to the maps
    //count frequency of each word in 1 object
    void countWordFreq(vector<string> words, map<string,int>& word_object_map);
    //add words frequency of 1 object to common map
    void mergeMaps(map<string, int>& word_object_map, Object& tokens_object);
};

#endif // PREPROCESS_H
